#############
#############
#############
## Stefan Müller and Sven-Oliver Proksch: 
## Nostalgia in European Party Politics:
## A Text-Based Measurement Approach
## British Journal of Political Science
##
## Script returns all tables and plots 
## included in the main paper
#############
#############
#############

# load packages
library(dplyr)      # CRAN v1.1.2 
library(texreg)     # CRAN v1.38.6
library(ggplot2)    # CRAN v3.4.2 
library(lme4)       # CRAN v1.1-34 
library(cowplot)    # CRAN v1.1.1 
library(ggcorrplot) # CRAN v0.1.4 
library(xtable)     # CRAN v1.8-4 
library(tidyr)      # CRAN v1.3.0 
library(readr)      # CRAN v2.1.4
library(stringr)    # CRAN v1.5.0

# If the code does not run, one or more packages may have been 
# updated, which may result in errors or conflicts. You can solve this issue
# by installing the package version listed above or by using the 
# groundhog package:
# after installing groundhog using install.packages("groundhog")
# change library(name_of_package) to
# groundhog::groundhog.library(name_of_package, date = "2023-09-04")
# Instead of adjusting the library() function for each package, 
# you can adjust them at all once using the
# the following syntax:
# groundhog.library("
#                   library('pkgA')
#                   library('pkgB')
#                   library('pkgC')", date = "2023-09-04")
# More details are available at: https://groundhogr.com/using/

# function for custom ggplot2 scheme
source("function_theme_base.R")

# dataset with one observation per manifesto
dat_manifestolevel_raw <- readRDS("data_nostalgia_manifestolevel.rds")


# compare hand-coding, classifiers, and GPT 3.5 (see Figure 2)
dat_joined <- read_csv("data_coded_gpt-3.5_clean.csv")

# select relevant variables to be merged with dat_joined
dat_manifestolevel_sents <- dat_manifestolevel_raw |> 
    mutate(manifesto_id_new = manifesto_id) |> 
    select(manifesto_id_new, n_sentences_manifesto, edate,
           nost_sents_sum_base, nost_sents_sum_emb_pos,
           nost_sents_sum_pos, partyname, countryname,
           party_family_recoded, loglibcons)


# recode nostalgia score based on GPT rating
dat_joined <- dat_joined |> 
    mutate(score_gpt = as.integer(score_gpt)) |> 
    mutate(nostalgia_sentence_gpt = ifelse(score_gpt > 4, 1, 0)) # nostalgic if at least 5


# summarise to level of manifestos
dat_sum <- dat_joined |> 
    left_join(dat_manifestolevel_sents) |> 
    group_by(manifesto_id_new, partyname,
             countryname, edate,
             nost_sents_sum_base, 
             nost_sents_sum_pos, 
             nost_sents_sum_emb_pos,
             party_family_recoded, loglibcons,
             n_sentences_manifesto) |> 
    rename(sum_nostalgia_base = nost_sents_sum_base,
           sum_nostalgia_base_pos = nost_sents_sum_pos,
           sum_nostalgia_emb_pos = nost_sents_sum_emb_pos) |> 
    summarise(sum_nostalgia_coded_at_least_one = sum(nostalgia_coded_at_least_one),
              sum_nostalgia_coded_both = sum(nostalgia_coded_both),
              sum_nostalgia_bert = sum(nostalgia_sentence_bert),
              sum_nostalgia_gpt = sum(nostalgia_sentence_gpt),
              sum_nostalgia_svm = sum(nostalgia_sentence_svm),
              sum_nostalgia_emb = sum(nostalgia_sentence_dummy_emb)) |> 
    mutate(nostalgia_coded_at_least_one = sum_nostalgia_coded_at_least_one / n_sentences_manifesto * 1000,
           nostalgia_coded_both = sum_nostalgia_coded_both / n_sentences_manifesto * 1000,
           nostalgia_gpt = sum_nostalgia_gpt / n_sentences_manifesto * 1000,
           nostalgia_bert = sum_nostalgia_bert / n_sentences_manifesto * 1000,
           nostalgia_emb = sum_nostalgia_emb / n_sentences_manifesto * 1000,
           nostalgia_svm = sum_nostalgia_svm / n_sentences_manifesto * 1000,
           nostalgia_base = sum_nostalgia_base / n_sentences_manifesto * 1000,
           nostalgia_base_pos = sum_nostalgia_base_pos / n_sentences_manifesto * 1000,
           nostalgia_emb_pos = sum_nostalgia_emb_pos / n_sentences_manifesto * 1000) |> 
    ungroup() |> 
    select(starts_with("nostalgia_"), starts_with("sum_"), 
           party_family_recoded, loglibcons, edate,
           partyname, countryname)


# calculate correlations
dat_sum_cor <- dat_sum |> 
    select(`Method 1\n(Dict.)` = nostalgia_base,
           `Method 2\n(Dict + Emb.)` = nostalgia_emb,
           `Method 3\n(Dict. + Sentiment)`= nostalgia_base_pos,
           `Method 4\n(Dict. + Emb. + Sentiment)`= nostalgia_emb_pos, 
           `Method 5\n(SVM)`= nostalgia_svm, 
           `Method 6\n(DistilBERT)`= nostalgia_bert,
           `LLM: GPT 3.5`= nostalgia_gpt,
           `Human Coding (>=1 Coder)` = nostalgia_coded_at_least_one,
           `Human Coding (2 Coders)` = nostalgia_coded_both)

names(dat_sum_cor)

cors_nost <- cor(dat_sum_cor, use = "pairwise.complete.obs")

cors_nost

p_corr <- ggcorrplot(cors_nost,
                     #type = "upper",
                     lab = TRUE,
                     show.diag = TRUE,
                     lab_size = 4,
                     lab_col = "grey20",
                     #colors = c("white", "white", "grey60"),
                     colors = c("white", "white", "white"),
                     legend.title = "Correlation",
                     ggtheme = theme_baser,
) +
    theme(legend.position = "none") +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5))

p_corr


cowplot::plot_grid(p_corr, NULL, nrow = 1,
                   rel_widths = c(0.8, 0.2))
ggsave("fig_02.pdf", 
       width = 12, height = 6)
ggsave("fig_02.png", dpi = 600,
       width = 12, height = 6)


# store dat_manifestolevel for regression
dat_reg <- dat_manifestolevel_raw

# get range of loglibcons variable for observations
# included in our regression models
dat_reg |> 
    filter(!is.na(unemp_lag1)) |> 
    summarise(min_libcons = min(loglibcons),
              max_libcons = max(loglibcons))


# change baseline category for some of the variables
dat_reg$party_family_recoded <- relevel(factor(dat_reg$party_family_recoded),
                                        ref = "Nationalist")


dat_reg$cabinet_status_lag2 <- relevel(factor(dat_reg$cabinet_status_lag2),
                                       ref = "Opposition")


dat_reg$decade <- factor(dat_reg$decade)

dat_reg$populism_popu_list_categories <- relevel(factor(dat_reg$populism_popu_list_categories),
                                                 ref = "Other")



# get overview of manifesto-level nostalgia for Figure 3

dat_minmax_bert <- dat_reg |> 
    group_by(region, countryname, party, party_family_recoded,
             partyname, partyabbrev, populism_popu_list_categories) |> 
    mutate(n_manifestos = n()) |> 
    filter(n_manifestos >= 2) |> 
    summarise(mean_party = mean(nostalgia_sentences_per_1000_bert)) |> 
    group_by(countryname) |> 
    mutate(mean_nostalgia = mean(mean_party)) |> 
    group_by(countryname) |> 
    arrange(countryname, -mean_party) |> 
    filter(row_number()==1 | row_number()==n())

# get country level averages

# highest levels
dat_minmax_bert |> 
    select(mean_nostalgia, countryname) |> 
    unique() |> 
    arrange(-mean_nostalgia)

# lowest levels
dat_minmax_bert |> 
    select(mean_nostalgia, countryname) |> 
    unique() |> 
    arrange(mean_nostalgia)

# clean up party family abbreviations
dat_minmax_bert_clean <- dat_minmax_bert |> 
    mutate(partyabbrev = ifelse(countryname == "Norway" & party == "12221", "SF",
                                partyabbrev)) |> 
    mutate(partyabbrev = ifelse(countryname == "France" & party == "31621", "UMP",
                                partyabbrev)) |>
    mutate(partyabbrev = ifelse(countryname == "Ireland" & party == "53620", "FF",
                                partyabbrev)) |> 
    mutate(partyabbrev = ifelse(countryname == "Latvia" & party == "87071", "LNNK",
                                partyabbrev)) |> 
    mutate(partyabbrev = ifelse(party == "51420", "Lib P.", partyabbrev)) |> 
    mutate(partyabbr_recoded = dplyr::recode(partyabbrev, 
                                             "KSČM" = "KSCM")) |> 
    mutate(party_family_recoded = str_replace_all(party_family_recoded, "Democratic", "Dem.")) |> 
    mutate(party_family_recoded = str_replace_all(party_family_recoded, "Christian", "Chr.")) |> 
    mutate(party_family_recoded = str_replace_all(party_family_recoded, "Nationalist", "Nat.")) |> 
    mutate(party_family_recoded = str_replace_all(party_family_recoded, "Conservative", "Cons."))



dat_minmax_bert_clean <- dat_minmax_bert_clean |>
    mutate(party_family_recoded = dplyr::recode(party_family_recoded, "Other" = "Oth.")) |> 
    mutate(party_family_recoded = paste0("(", party_family_recoded, ")")) |> 
    mutate(partyabbr_recoded = ifelse(partyabbr_recoded == "GERB", "    GERB", 
                                      partyabbr_recoded)) |> 
    mutate(partyabbr_recoded = ifelse(party == "41111", "GP", partyabbr_recoded)) |> 
    mutate(partyabbr_recoded = ifelse(party == "15450", "LibRefP", partyabbr_recoded)) |> 
    mutate(partyabbr_recoded = ifelse(party == "22711", "CD", partyabbr_recoded)) |> 
    mutate(party_family_recoded = ifelse(partyabbr_recoded == "GERB", "   Christian Dem.", 
                                         party_family_recoded)) |> 
    mutate(party_family_recoded = ifelse(partyabbr_recoded == "DPS", "(Oth.)        ", 
                                         party_family_recoded))


# relevel factor levels for region
dat_minmax_bert_clean$region <- factor(dat_minmax_bert_clean$region,
                                       levels = c("Central and Eastern Europe",
                                                  "Southern Europe",
                                                  "Western Europe",
                                                  "Northern Europe"))

# Figure 3 ----
ggplot(dat_minmax_bert_clean, aes(x = mean_party, y = reorder(countryname, 
                                                              mean_nostalgia)))  +
    geom_line(aes(group = countryname), colour = "grey40",
              linetype = "dashed") +
    geom_point(colour = "black", size = 2) +
    geom_text(aes(label = partyabbr_recoded),
              colour = "grey40", 
              nudge_y = 0.25,
              size = 4.3) +
    geom_text(aes(label = party_family_recoded),
              colour = "grey40", 
              nudge_y = -0.25,
              size = 3) +
    geom_point(aes(x = mean_nostalgia), 
               shape = 15, colour = "black",
               size = 3) +
    facet_grid(region~., scales = "free", space = "free") +
    scale_x_continuous(limits = c(-10, 130),
                       breaks = c(seq(0, 120, 20))) +
    labs(x = "Party-Level Nostalgia (DistilBERT)",
         y = NULL) 
ggsave("fig_03.pdf",
       width = 9, height = 12)
ggsave("fig_03.png", dpi = 600,
       width = 9, height = 12)


# rescale variables for regression models
rescale_var <- function(x) {
    (x - mean(x)) / sd(x)
}

# test function
rescale_var(x = c(1, 3, 5, 6))


# z transform variables
dat_reg <- dat_reg |> 
    filter(!is.na(nostalgia_sentences_per_1000_bert)) |> 
    ungroup() |> 
    mutate(nostalgia_base_rescale = rescale_var(nostalgia_sentences_per_1000),
           nostalgia_base_sent_rescale = rescale_var(nostalgia_sentences_per_1000_sentiment),
           nostalgia_emb_rescale = rescale_var(nostalgia_sentences_per_1000_emb),
           nostalgia_emb_sent_rescale = rescale_var(nostalgia_sentences_per_1000_sentiment_emb),
           nostalgia_bert_rescale = rescale_var(nostalgia_sentences_per_1000_bert),
           nostalgia_svm_rescale = rescale_var(nostalgia_sentences_per_1000_svm))



# run main regression models, reported in Table 01

lm_rescale_base <- lmer(
    nostalgia_base_rescale ~
        +  loglibcons + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)


# run rescaled models across measures
lm_rescale_emb <- update(lm_rescale_base, 
                         nostalgia_emb_rescale ~.
)


lm_rescale_emb_sent <- update(lm_rescale_base, 
                              nostalgia_emb_sent_rescale ~.
)


lm_rescale_sent <- update(lm_rescale_base, 
                          nostalgia_base_sent_rescale ~.
)

lm_rescale_svm <- update(lm_rescale_base, 
                         nostalgia_svm_rescale ~.
)

lm_rescale_bert <- update(lm_rescale_base, 
                          nostalgia_bert_rescale ~.
)


gofnames <- c("AIC", "BIC", 
              "Log Likelihood",
              "N",
              "N Groups: Parties", 
              "N Groups: Elections",
              "N Groups: Countries")

screenreg(list(lm_rescale_base, lm_rescale_emb,
               lm_rescale_sent, lm_rescale_emb_sent,
               lm_rescale_svm,
               lm_rescale_bert))

texreg(list(lm_rescale_base, 
            lm_rescale_emb,
            lm_rescale_sent, 
            lm_rescale_emb_sent,
            lm_rescale_svm,
            lm_rescale_bert),
       custom.header = list("Dictionary-Based Methods (M1--M4)" = 1:4, "Machine Learning (M5--M6)" = 5:6),
       caption.above = TRUE,
       custom.coef.map = list(
           "(Intercept)" = "(Intercept)",
           "loglibcons" = "Cultural Conservatism",
           "cabinet_status_lag2Government" = "Government",
           "vote_share_cmp" = "Vote Share",
           "unemp_lag1" = "Unemployment (t-1)"
       ),
       include.variance = FALSE,
       custom.model.names = c("M1",
                              "M2",
                              "M3",
                              "M4",
                              "M5",
                              "\\textbf{M6}"),
       custom.gof.names = gofnames,
       caption = "Predicting nostalgia for various measurements with standardized dependent variables (mean of 0 and standard deviation of 1). Linear mixed-effects models with random intercepts for countries, parties, and elections. Standard errors in parentheses. 
       M1: Base dictionary; M2: Base dictionary + embeddings dictionary; M3: Base dictionary + positive sentiment; M4: Base dictionary + embeddings dictionary + positive sentiment; M5: Bag-of-words classifier (SVM); M6: Transformer-based classifier (DistilBERT).",
       fontsize = "footnotesize",
       label = "tab:main_rescaled",
       file = "tab_01.tex")


# save as html

htmlreg(list(lm_rescale_base, 
             lm_rescale_emb,
             lm_rescale_sent, 
             lm_rescale_emb_sent,
             lm_rescale_svm,
             lm_rescale_bert),
        custom.header = list("Dictionary-Based Methods (M1--M4)" = 1:4, "Machine Learning (M5--M6)" = 5:6),
        caption.above = TRUE,
        custom.coef.map = list(
            "(Intercept)" = "(Intercept)",
            "loglibcons" = "Cultural Conservatism",
            "cabinet_status_lag2Government" = "Government",
            "vote_share_cmp" = "Vote Share",
            "unemp_lag1" = "Unemployment (t-1)"
        ),
        include.variance = FALSE,
        custom.model.names = c("M1",
                               "M2",
                               "M3",
                               "M4",
                               "M5",
                               "M6"),
        custom.gof.names = gofnames,
        caption = "Predicting nostalgia for various measurements with standardized dependent variables (mean of 0 and standard deviation of 1). Linear mixed-effects models with random intercepts for countries, parties, and elections. Standard errors in parentheses. 
       M1: Base dictionary; M2: Base dictionary + embeddings dictionary; M3: Base dictionary + positive sentiment; M4: Base dictionary + embeddings dictionary + positive sentiment; M5: Bag-of-words classifier (SVM); M6: Transformer-based classifier (DistilBERT).",
        fontsize = "footnotesize",
        label = "tab:main_rescaled",
        file = "tab_01.html")




# models for various ideology measures, reported in Table 2

lm_base_loglibcons <- lmer(
    nostalgia_sentences_per_1000 ~
        loglibcons + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)



lm_base_bert_loglibcons <- lmer(
    nostalgia_sentences_per_1000_bert ~
        +  loglibcons + cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)


lm_base_bert_stateconomy <- lmer(
    nostalgia_sentences_per_1000_bert ~
        stateconomy + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)


lm_base_bert_loglibcons_stateeconomy <- lmer(
    nostalgia_sentences_per_1000_bert ~
        loglibcons + 
        stateconomy +
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)

lm_base_bert_partyfam <- lmer(
    nostalgia_sentences_per_1000_bert ~
        +  party_family_recoded + cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)

lm_base_bert_populism <- lmer(
    nostalgia_sentences_per_1000_bert ~
        +  populist_dummy + party_family_recoded + 
        cabinet_status_lag2 +
        vote_share_cmp +
        unemp_lag1 + 
        (1 | election_id) + (1 | countryname)  +
        (1 | party),
    data = dat_reg
)

# Table 02 ----
texreg(list(lm_base_bert_loglibcons,
            lm_base_bert_stateconomy,
            lm_base_bert_loglibcons_stateeconomy,
            lm_base_bert_partyfam,
            lm_base_bert_populism),
       caption.above = TRUE,
       include.variance = FALSE,
       custom.model.names = c("M1", "M2", "M3", "M4", "M5"),
       custom.coef.map =  list(
           "(Intercept)" = "(Intercept)",
           "cabinet_status_lag2Government" = "Government",
           "vote_share_cmp" = "Vote Share",
           "unemp_lag1" = "Unemployment (t-1)",
           "loglibcons" = "Cultural Conservatism",
           "stateconomy" = "Economic Left-Right Position",
           "party_family_recodedChristian Democratic" = "Party Family: Christian Dem. (ref.: Nationalist)",
           "party_family_recodedConservative" = "Party Family: Conservative",
           "party_family_recodedEcological" = "Party Family: Ecological",
           "party_family_recodedLiberal" = "Party Family: Liberal",
           "party_family_recodedOther" = "Party Family: Other",
           "party_family_recodedSocial Democratic" = "Party Family: Social Dem.",
           "party_family_recodedSocialist" = "Party Family: Socialist",
           "populist_dummyPopulist Party" = "Populist Party"),
       custom.gof.names = gofnames,
       caption = "Predicting nostalgic sentences (per 1,000 sentences; DistilBERT measure) using various measures of party ideology.
       Linear mixed-effects models with random intercepts for countries, parties, and elections. Standard errors in parentheses.",
       fontsize = "footnotesize",
       label = "tab:base_ideology",
       file = "tab_02.tex")


# save as html file

htmlreg(list(lm_base_bert_loglibcons,
             lm_base_bert_stateconomy,
             lm_base_bert_loglibcons_stateeconomy,
             lm_base_bert_partyfam,
             lm_base_bert_populism),
        caption.above = TRUE,
        include.variance = FALSE,
        custom.model.names = c("M1", "M2", "M3", "M4", "M5"),
        custom.coef.map =  list(
            "(Intercept)" = "(Intercept)",
            "cabinet_status_lag2Government" = "Government",
            "vote_share_cmp" = "Vote Share",
            "unemp_lag1" = "Unemployment (t-1)",
            "loglibcons" = "Cultural Conservatism",
            "stateconomy" = "Economic Left-Right Position",
            "party_family_recodedChristian Democratic" = "Party Family: Christian Dem. (ref.: Nationalist)",
            "party_family_recodedConservative" = "Party Family: Conservative",
            "party_family_recodedEcological" = "Party Family: Ecological",
            "party_family_recodedLiberal" = "Party Family: Liberal",
            "party_family_recodedOther" = "Party Family: Other",
            "party_family_recodedSocial Democratic" = "Party Family: Social Dem.",
            "party_family_recodedSocialist" = "Party Family: Socialist",
            "populist_dummyPopulist Party" = "Populist Party"),
        custom.gof.names = gofnames,
        caption = "Predicting nostalgic sentences (per 1,000 sentences; DistilBERT measure) using various measures of party ideology. 
       Linear mixed-effects models with random intercepts for countries, parties, and elections. Standard errors in parentheses.",
        fontsize = "footnotesize",
        label = "tab:base_ideology",
        file = "tab_02.html")

